library(ggplot2)
library(devtools)
library(phyloseq)
library(picante)
library(tidyr)
library(forcats)
library(dplyr)
library(tibble)
library(cowplot)
library(picante)    # Will also include ape and vegan 
library(car)        # For residual analysis
library(sandwich)   # for vcovHC function in post-hoc test
library(MASS)       # For studres in plot_residuals function
library(caret)      # For cross validation
library(pander)     # For pretty tables 
source("code/Muskegon_functions.R")
source("code/set_colors.R")

Load data

# Loads a phyloseq object named otu_merged_musk_pruned)
load("data/surface_PAFL_otu_pruned_raw.RData")
# The name of the phyloseq object is: 
surface_PAFL_otu_pruned_raw 
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 7806 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 7806 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 7806 tips and 7804 internal nodes ]
# Remove doubletons!
surface_PAFL_otu_pruned_rm2 <- prune_taxa(taxa_sums(surface_PAFL_otu_pruned_raw) > 2, surface_PAFL_otu_pruned_raw) 
surface_PAFL_otu_pruned_rm2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2979 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 2979 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2979 tips and 2977 internal nodes ]
# Remove tree for computational efficiency 
surface_PAFL_otu_pruned_notree_rm2 <- phyloseq(tax_table(surface_PAFL_otu_pruned_rm2), otu_table(surface_PAFL_otu_pruned_rm2), sample_data(surface_PAFL_otu_pruned_rm2))


# Gather the metadata in a dataframe to play with 
metadata <- data.frame(sample_data(surface_PAFL_otu_pruned_notree_rm2)) %>%
      mutate(fraction = factor(fraction, levels = c("WholePart","WholeFree")),
         lakesite = factor(lakesite,  levels = c("MOT", "MDP", "MBR", "MIN")),
         fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))
scaled_surface_PAFLA_pruned_rm2 <- scale_reads(surface_PAFL_otu_pruned_notree_rm2)

set.seed(777)

# Calculate the SOREN distance
soren_whole_dist <- ordinate(
  physeq = scaled_surface_PAFLA_pruned_rm2,
  method = "PCoA",
  distance = "bray",
  binary = TRUE) # Make it presence/absence
 
p1 <- plot_ordination(
  physeq = scaled_surface_PAFLA_pruned_rm2,
  ordination = soren_whole_dist,
  color = "fraction",
  shape = "lakesite",
  title = "Sørensen") +
  geom_point(size=5, alpha = 0.7, aes(fill =fraction,  color = fraction)) +
  scale_colour_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  scale_shape_manual(values = c(21, 22, 23, 24)) +
  theme(legend.position = "bottom")


# Calculate the BRAY-CURTIS distance
bray_whole_dist <- ordinate(
  physeq = scaled_surface_PAFLA_pruned_rm2,
  method = "PCoA",
  distance = "bray",
  binary = FALSE) # Make it Abundance weighted 
 
p2 <- plot_ordination(
  physeq = scaled_surface_PAFLA_pruned_rm2,
  ordination = bray_whole_dist ,
  color = "fraction",
  shape = "lakesite",
  title = "Bray-Curtis") +
  geom_point(size=5, alpha = 0.7, aes(fill =fraction,  color = fraction)) +
  scale_colour_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  scale_shape_manual(values = c(21, 22, 23, 24)) +
  theme(legend.position = "bottom")

plot_grid(p1, p2, labels = c("A", "B"), align = "h", ncol = 2)

Figure 1

######################################################### Fraction ABUNDANCe 
frac_abund_wilcox <- wilcox.test(log10(as.numeric(fraction_bac_abund)) ~ fraction, data = metadata)
frac_abund_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  log10(as.numeric(fraction_bac_abund)) by fraction
## W = 0, p-value = 1.479e-06
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
  group_by(fraction) %>%
  summarize(mean(as.numeric(fraction_bac_abund)))
## # A tibble: 2 × 2
##   fraction `mean(as.numeric(fraction_bac_abund))`
##     <fctr>                                  <dbl>
## 1 Particle                               41168.88
## 2     Free                              734522.25
# Make a data frame to draw significance line in boxplot (visually calculated)
dat1 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(6.45,6.5,6.5,6.45)) # WholePart vs WholeFree

poster_a <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"), 
       aes(y = log10(as.numeric(fraction_bac_abund)), x = fraction)) +
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
  ylab("Log10(Bacterial Cells/mL)") +
  ##### Particle vs free cell abundances 
  geom_path(data = dat1, aes(x = a, y = b), linetype = 1, color = "gray40") +
  annotate("text", x=1.5, y=6.5, fontface = "bold",  size = 3.5, color = "gray40",
           label= paste("***\np =", round(frac_abund_wilcox$p.value, digits = 6))) +
  theme(legend.position = "none",
        axis.title.x = element_blank())



######################################################### TOTAL PRODUCTION 
totprod_wilcox <- wilcox.test(frac_bacprod ~ fraction, data = metadata)
totprod_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  frac_bacprod by fraction
## W = 33, p-value = 0.02418
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
  group_by(fraction) %>%
  summarize(mean(frac_bacprod))
## # A tibble: 2 × 2
##   fraction `mean(frac_bacprod)`
##     <fctr>                <dbl>
## 1 Particle             9.958333
## 2     Free            24.058333
# Make a data frame to draw significance line in boxplot (visually calculated)
dat2 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(67,68,68,67)) # WholePart vs WholeFree


poster_b <- ggplot(metadata, aes(y = frac_bacprod, x = fraction)) + 
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  ylab("Heterotrophic Production (μgC/L/hr)") +
  ##### Particle vs free bulk production  
  geom_path(data = dat2, aes(x = a, y = b), linetype = 1, color = "gray40") +
  annotate("text", x=1.5, y=68, fontface = "bold",  size = 3.5, color = "gray40",
           label= paste("**\np =", round(totprod_wilcox$p.value, digits = 3))) +
  theme(legend.position = "none",
        axis.title.x = element_blank())



######################################################### TOTAL PRODUCTION 
percellprod_wilcox <- wilcox.test(log10(fracprod_per_cell) ~ fraction, data = filter(metadata, norep_filter_name != "MOTEJ515"))
percellprod_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  log10(fracprod_per_cell) by fraction
## W = 125, p-value = 6.656e-05
## alternative hypothesis: true location shift is not equal to 0
filter(metadata, norep_filter_name != "MOTEJ515") %>%
  group_by(fraction) %>%
  summarize(mean(fracprod_per_cell))
## # A tibble: 2 × 2
##   fraction `mean(fracprod_per_cell)`
##     <fctr>                     <dbl>
## 1 Particle              4.816116e-07
## 2     Free              3.866798e-08
# Make a data frame to draw significance line in boxplot (visually calculated)
dat3 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(-5.05,-5,-5,-5.05)) # WholePart vs WholeFree


poster_c <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"), 
       aes(y = log10(fracprod_per_cell), x = fraction)) +
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  ylim(c(-8.5, -5)) + 
  ylab("log10(production/cell) (μgC/cell/hr)") +
  ##### Particle vs free per-cell production 
  geom_path(data = dat3, aes(x = a, y = b), linetype = 1, color = "gray40") +
  annotate("text", x=1.5, y=-5, fontface = "bold",  size = 3.5, color = "gray40",
           label= paste("***\np =", round(percellprod_wilcox$p.value, digits = 5))) +
  theme(legend.position = "none",
        axis.title.x = element_blank())

plot_grid(poster_a, poster_b, poster_c,
          labels = c("A", "B", "C"),
          ncol = 3)

Figure S1

work_df <- metadata %>%
  dplyr::select(norep_filter_name, fraction, fraction_bac_abund, frac_bacprod, fracprod_per_cell_noinf) %>%
  mutate(norep_water_name = paste(substr(norep_filter_name, 1,4), substr(norep_filter_name, 6,9), sep = "")) %>%
  dplyr::select(-norep_filter_name)

part_work_df <- work_df %>%
  filter(fraction == "Particle") %>%
  rename(part_bacabund = fraction_bac_abund,
         part_prod = frac_bacprod, 
         part_percell_prod = fracprod_per_cell_noinf) %>%
  dplyr::select(-fraction)

free_work_df <- work_df %>%
  filter(fraction == "Free") %>%
  rename(free_bacabund = fraction_bac_abund,
         free_prod = frac_bacprod, 
         free_percell_prod = fracprod_per_cell_noinf) %>%
  dplyr::select(-fraction)

byfrac_df <- part_work_df %>%
  left_join(free_work_df, by = "norep_water_name")

summary(lm(log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df, norep_water_name != "MOTE515")))
## 
## Call:
## lm(formula = log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df, 
##     norep_water_name != "MOTE515"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52690 -0.08048  0.05903  0.19565  0.35255 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)            2.0900     3.2247   0.648    0.533
## log10(free_bacabund)   0.4264     0.5532   0.771    0.461
## 
## Residual standard error: 0.3111 on 9 degrees of freedom
## Multiple R-squared:  0.06194,    Adjusted R-squared:  -0.04229 
## F-statistic: 0.5943 on 1 and 9 DF,  p-value: 0.4605
plot1 <- ggplot(filter(byfrac_df, norep_water_name != "MOTE515"), 
       aes(x = log10(free_bacabund), y = log10(part_bacabund))) +
  xlab("Free") +  ylab("Particle") + 
  ggtitle("Log10(cells/mL)") +
  geom_point(size = 3, shape = 21, fill = "grey") 


lm_prod_corr <- lm(part_prod ~ free_prod, data = byfrac_df)

plot2 <- ggplot(byfrac_df, aes(x = free_prod, y = part_prod)) +
  xlab("Free") +  ylab("Particle") + 
  ggtitle("Heterotrophic Production \n(μgC/L/hr)") +
  geom_point(size = 3, shape = 21, fill = "grey") +
  geom_smooth(method = "lm", color = "black")  + 
  annotate("text", x =20, y= 30, color = "black", fontface = "bold", size = 4,
       label = paste("R2 =", round(summary(lm_prod_corr)$adj.r.squared, digits = 3), "\n", 
                     "p =", round(unname(summary(lm_prod_corr)$coefficients[,4][2]), digits = 3))) 


lm_percell_corr <- lm(log10(part_percell_prod) ~ log10(free_percell_prod), data = byfrac_df)

plot3 <- ggplot(byfrac_df,
       aes(x = log10(free_percell_prod), y = log10(part_percell_prod))) +  
  xlab("Free") +  ylab("Particle") + 
  ggtitle("log10(production/cell) \n (μgC/cell/hr)") +
  geom_point(size = 3, shape = 21, fill = "grey") +
  geom_smooth(method = "lm", color = "black") + 
  annotate("text", y = -5.8, x=-7.9, color = "black", fontface = "bold", size = 4,
       label = paste("R2 =", round(summary(lm_percell_corr)$adj.r.squared, digits = 3), "\n", 
                     "p =", round(unname(summary(lm_percell_corr)$coefficients[,4][2]), digits = 3))) 

plot_grid(plot1, plot2, plot3, 
          nrow = 1, ncol = 3, 
          labels = c("A", "B", "C"), 
          align = "h")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on '(μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on '(μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

Calculate Community Diversity

# Set the seed for reproducibility
set.seed(777)

# Calculate the alpha diversity with 100 repsampling events
alpha_calc <- calc_alpha_diversity(physeq = surface_PAFL_otu_pruned_notree_rm2)

# What was the minimum sample size? 
min(sample_sums(surface_PAFL_otu_pruned_notree_rm2)) - 1
## [1] 6489
# Put it altogether in a dataframe 
otu_alphadiv <- calc_mean_alphadiv(physeq = surface_PAFL_otu_pruned_notree_rm2,
                           richness_df = alpha_calc$Richness, 
                           evenness_df = alpha_calc$Inverse_Simpson, 
                           shannon_df = alpha_calc$Shannon) %>%
    mutate(fraction = factor(fraction, levels = c("WholePart","WholeFree")),
         lakesite = factor(lakesite,  levels = c("MOT", "MDP", "MBR", "MIN")),
         measure = factor(measure, levels = c("Richness",  "Shannon_Entropy", "Inverse_Simpson", "Simpsons_Evenness")),
         fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"),
         lakesite = fct_recode(lakesite, Outlet = "MOT", Deep = "MDP", Bear = "MBR", River = "MIN"))

Correlations between Diversity Measures

##########################################################################
###########################   CORRELATIONS   #############################
##########################################################################
# RICHNESS vs SHANNON
cor(filter(otu_alphadiv, measure == "Richness" & fraction == "Particle")$mean,
    filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")$mean) # YES
## [1] 0.9406491
# SHANNON VS INVERSE SIMPSON
cor(filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")$mean,
    filter(otu_alphadiv, measure == "Inverse_Simpson" &  fraction == "Particle")$mean) # YES
## [1] 0.9651242
# INVERSE SIMPSON VS SIMPSONS EVENNESS
cor(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle")$mean,
    filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")$mean) # YES
## [1] 0.9145095
# SIMPSONS EVENNESS VS RICHNESS
cor(filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")$mean,
    filter(otu_alphadiv, measure == "Richness" &  fraction == "Particle")$mean) # YES
## [1] 0.6881205
ggplot(otu_alphadiv, aes(y = mean, x = fraction)) +
  ylab("Mean Diversity Value") +
  facet_wrap(~measure, scale = "free_y", ncol = 4) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), color = "black") +
  geom_boxplot(alpha = 0.5, outlier.shape = NA, color = "black", aes(fill = fraction)) +
  scale_fill_manual(values = fraction_colors) + 
  scale_color_manual(values = fraction_colors) + 
  theme(legend.position = "none", axis.title.x = element_blank()) 

ggplot(filter(otu_alphadiv, measure == "Richness" & fraction == "Particle"), 
              aes(x = lakesite, y = mean, fill = lakesite)) + 
  geom_jitter(size = 3, shape = 21) + 
  ylab("Observed Richness") + 
  ggtitle("Particle Fraction Only") +  
  geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) + 
  scale_fill_manual(values = lakesite_colors)

otu_alphadiv %>%
  filter(measure == "Richness" & fraction == "Particle") %>%
  group_by(lakesite) %>%
  summarize(mean(mean), sd(mean))
## # A tibble: 4 × 3
##   lakesite `mean(mean)` `sd(mean)`
##     <fctr>        <dbl>      <dbl>
## 1   Outlet     430.8567   81.99728
## 2     Deep     472.0833   23.48010
## 3     Bear     637.9933  242.53768
## 4    River     686.2633  135.20325
ggplot(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle"), 
              aes(x = lakesite, y = mean, fill = lakesite)) + 
  geom_jitter(size = 3, shape = 21) + 
  ggtitle("Particle Fraction Only") + 
  geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) + 
  scale_fill_manual(values = lakesite_colors)

otu_alphadiv %>%
  filter(measure == "Inverse_Simpson" & fraction == "Particle") %>%
  group_by(lakesite) %>%
  summarize(mean(mean))
## # A tibble: 4 × 2
##   lakesite `mean(mean)`
##     <fctr>        <dbl>
## 1   Outlet     23.66717
## 2     Deep     23.76370
## 3     Bear     35.36997
## 4    River     59.10553

Linear Regression Statistics

#################################### Subset Dataframes 
richness <- filter(otu_alphadiv, measure == "Richness")
shannon <- filter(otu_alphadiv, measure == "Shannon_Entropy")
invsimps <- filter(otu_alphadiv, measure == "Inverse_Simpson")
simpseven <- filter(otu_alphadiv, measure == "Simpsons_Evenness")


#################################### Bulk Measure Production #################################### 
################### Richness ################### 
summary(lm(frac_bacprod ~ mean, data = richness))  # All samples together
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = richness)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.057 -12.017  -5.654   9.256  46.605 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 18.501168   9.039579   2.047   0.0528 .
## mean        -0.003335   0.018911  -0.176   0.8616  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.54 on 22 degrees of freedom
## Multiple R-squared:  0.001412,   Adjusted R-squared:  -0.04398 
## F-statistic: 0.0311 on 1 and 22 DF,  p-value: 0.8616
# Particle-Associated 
lm_prod_vs_rich_PA <- lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_prod_vs_rich_PA)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction == 
##     "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1271 -2.8865 -0.4649  3.7537  9.8401 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -11.269531   5.717450  -1.971  0.07701 . 
## mean          0.038125   0.009868   3.863  0.00314 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.476 on 10 degrees of freedom
## Multiple R-squared:  0.5988, Adjusted R-squared:  0.5587 
## F-statistic: 14.93 on 1 and 10 DF,  p-value: 0.003143
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_rich_PA <- train(
      frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"), 
      method ='lm', 
      trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100), 
      tuneGrid = expand.grid(intercept = TRUE))

cv_lm_prod_vs_rich_PA$results   # Particle-Associated CV results 
##   intercept     RMSE  Rsquared   RMSESD RsquaredSD
## 1      TRUE 6.607118 0.6566796 2.271972  0.2865878
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Free"))) # Free Living 
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction == 
##     "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.094 -11.112  -2.755   8.611  33.308 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  5.40793   21.62006   0.250    0.808
## mean         0.05511    0.06207   0.888    0.395
## 
## Residual standard error: 17.7 on 10 degrees of freedom
## Multiple R-squared:  0.07306,    Adjusted R-squared:  -0.01963 
## F-statistic: 0.7882 on 1 and 10 DF,  p-value: 0.3955
################### Shannon Entropy ################### 
summary(lm(frac_bacprod ~ mean, data = shannon))  # All samples together
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = shannon)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.659 -11.878  -5.666   9.231  46.593 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.63373   26.71652   0.623    0.540
## mean         0.08797    6.22969   0.014    0.989
## 
## Residual standard error: 15.55 on 22 degrees of freedom
## Multiple R-squared:  9.064e-06,  Adjusted R-squared:  -0.04545 
## F-statistic: 0.0001994 on 1 and 22 DF,  p-value: 0.9889
# Particle-Associated 
lm_prod_vs_shannon_PA <- lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"))
summary(lm_prod_vs_shannon_PA)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(shannon, fraction == 
##     "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.9022 -2.9150 -0.5875  1.6713 12.0544 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -40.320     14.007  -2.878  0.01643 * 
## mean          11.089      3.068   3.614  0.00473 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.693 on 10 degrees of freedom
## Multiple R-squared:  0.5664, Adjusted R-squared:  0.5231 
## F-statistic: 13.06 on 1 and 10 DF,  p-value: 0.004734
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_shannon_PA <- train(
      frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"), 
      method ='lm', 
      trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100), 
      tuneGrid = expand.grid(intercept = TRUE))

cv_lm_prod_vs_shannon_PA$results   # Particle-Associated CV results 
##   intercept    RMSE  Rsquared  RMSESD RsquaredSD
## 1      TRUE 6.58008 0.6745309 2.49991   0.268315
summary(lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Free"))) # Free Living 
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(shannon, fraction == 
##     "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.004 -10.708  -3.738   6.632  37.129 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -13.37      73.87  -0.181    0.860
## mean            9.40      18.50   0.508    0.622
## 
## Residual standard error: 18.15 on 10 degrees of freedom
## Multiple R-squared:  0.02516,    Adjusted R-squared:  -0.07233 
## F-statistic: 0.2581 on 1 and 10 DF,  p-value: 0.6225
################### Inverse Simpson ################### 
summary(lm(frac_bacprod ~ mean, data = invsimps))  # All samples together
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = invsimps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.269 -10.298  -4.916   5.866  46.452 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  12.1577     6.0225   2.019   0.0559 .
## mean          0.1629     0.1731   0.941   0.3570  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.25 on 22 degrees of freedom
## Multiple R-squared:  0.03868,    Adjusted R-squared:  -0.005019 
## F-statistic: 0.8851 on 1 and 22 DF,  p-value: 0.357
# Particle-Associated samples 
lm_prod_vs_invsimps_PA <- lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_prod_vs_invsimps_PA)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction == 
##     "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.5589 -2.1093 -0.1969  0.8752  7.8282 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.45199    2.43866  -0.185  0.85666    
## mean         0.29344    0.05781   5.076  0.00048 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.571 on 10 degrees of freedom
## Multiple R-squared:  0.7204, Adjusted R-squared:  0.6925 
## F-statistic: 25.77 on 1 and 10 DF,  p-value: 0.0004804
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_invsimps_PA <- train(
      frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"), 
      method ='lm', 
      trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100), 
      tuneGrid = expand.grid(intercept = TRUE))

cv_lm_prod_vs_invsimps_PA$results       # Cross Validated PA results
##   intercept    RMSE  Rsquared   RMSESD RsquaredSD
## 1      TRUE 5.31148 0.7499916 2.119997  0.2619402
#Free Living Samples 
summary(lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction == 
##     "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.765  -9.356  -4.445   6.116  36.017 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  11.0985    16.7052   0.664    0.521
## mean          0.5379     0.6598   0.815    0.434
## 
## Residual standard error: 17.8 on 10 degrees of freedom
## Multiple R-squared:  0.06233,    Adjusted R-squared:  -0.03143 
## F-statistic: 0.6648 on 1 and 10 DF,  p-value: 0.4339
################### Simpson's Evenness ################### 
summary(lm(frac_bacprod ~ mean, data = simpseven))  # All samples together
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = simpseven)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.000  -7.163  -3.815   3.392  45.845 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -0.8524     9.2286  -0.092   0.9272  
## mean        274.1606   134.4253   2.040   0.0536 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.26 on 22 degrees of freedom
## Multiple R-squared:  0.159,  Adjusted R-squared:  0.1208 
## F-statistic:  4.16 on 1 and 22 DF,  p-value: 0.05359
# Particle-Associated 
lm_prod_vs_simpseven_PA <- lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"))
summary(lm_prod_vs_simpseven_PA)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(simpseven, fraction == 
##     "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.973  -2.229  -1.086   1.356  12.380 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   -3.829      4.539  -0.844  0.41865   
## mean         234.057     71.238   3.286  0.00821 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.995 on 10 degrees of freedom
## Multiple R-squared:  0.5191, Adjusted R-squared:  0.471 
## F-statistic: 10.79 on 1 and 10 DF,  p-value: 0.008211
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_simpseven_PA <- train(
      frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"), 
      method ='lm', 
      trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100), 
      tuneGrid = expand.grid(intercept = TRUE))

cv_lm_prod_vs_simpseven_PA$results   # Particle-Associated CV results 
##   intercept     RMSE  Rsquared   RMSESD RsquaredSD
## 1      TRUE 6.239458 0.6493075 2.882057  0.2950225
summary(lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Free"))) # Free Living 
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(simpseven, fraction == 
##     "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.389 -12.029  -3.306   4.668  39.946 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    15.85      23.52   0.674    0.516
## mean          114.96     321.02   0.358    0.728
## 
## Residual standard error: 18.27 on 10 degrees of freedom
## Multiple R-squared:  0.01266,    Adjusted R-squared:  -0.08607 
## F-statistic: 0.1282 on 1 and 10 DF,  p-value: 0.7277
#################################### Per-Cell Production #################################### 
################### Richness ################### 
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))  # All samples together 
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = richness)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8668 -0.2124  0.1045  0.2133  0.6473 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.4591277  0.2212633 -38.231  < 2e-16 ***
## mean         0.0028988  0.0004641   6.246  3.4e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3804 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6501, Adjusted R-squared:  0.6334 
## F-statistic: 39.02 on 1 and 21 DF,  p-value: 3.395e-06
# Particle-Associated 
lm_percell_prod_vs_rich_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_percell_prod_vs_rich_PA)
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, 
##     fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55347 -0.21545 -0.01066  0.12536  0.58830 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.0617648  0.3717671 -21.685 4.44e-09 ***
## mean         0.0023794  0.0006348   3.748  0.00457 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3506 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6095, Adjusted R-squared:  0.5662 
## F-statistic: 14.05 on 1 and 9 DF,  p-value: 0.004567
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_rich_PA <- train(
      log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle" & norep_filter_name != "MOTEJ515"), 
      method ='lm', 
      trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100), 
      tuneGrid = expand.grid(intercept = TRUE))

cv_lm_percell_prod_vs_rich_PA$results      # Particle-Associated CV results 
##   intercept      RMSE  Rsquared    RMSESD RsquaredSD
## 1      TRUE 0.4185395 0.6324071 0.1481733  0.3240003
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Free"))) # Free Living 
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, 
##     fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71719 -0.13833  0.07155  0.23581  0.56949 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.135758   0.471253 -17.264 8.99e-09 ***
## mean         0.001657   0.001353   1.225    0.249    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3859 on 10 degrees of freedom
## Multiple R-squared:  0.1304, Adjusted R-squared:  0.04344 
## F-statistic: 1.499 on 1 and 10 DF,  p-value: 0.2488
################### Shannon Entropy ################### 
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))  # All samples together 
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = shannon)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.03514 -0.15042 -0.03394  0.26568  0.82794 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -10.9036     0.7534 -14.472 2.14e-12 ***
## mean          0.8805     0.1763   4.993 6.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4348 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5428, Adjusted R-squared:  0.521 
## F-statistic: 24.93 on 1 and 21 DF,  p-value: 6.09e-05
# Particle-Associated 
lm_percell_prod_vs_shannon_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle"))
summary(lm_percell_prod_vs_shannon_PA)
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, 
##     fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36686 -0.23571 -0.01330  0.03961  0.70312 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -9.8897     0.8820 -11.213 1.37e-06 ***
## mean          0.6993     0.1935   3.614  0.00562 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3584 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5921, Adjusted R-squared:  0.5468 
## F-statistic: 13.06 on 1 and 9 DF,  p-value: 0.00562
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_shannon_PA <- train(
      log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle" & norep_filter_name != "MOTEJ515"), 
      method ='lm', 
      trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100), 
      tuneGrid = expand.grid(intercept = TRUE))

cv_lm_percell_prod_vs_shannon_PA$results      # Particle-Associated CV results 
##   intercept      RMSE  Rsquared    RMSESD RsquaredSD
## 1      TRUE 0.4135852 0.6756987 0.1418017  0.2988697
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Free"))) # Free Living 
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, 
##     fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72444 -0.17785  0.08114  0.13946  0.67366 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.8666     1.6332  -5.429 0.000289 ***
## mean          0.3243     0.4091   0.793 0.446298    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4014 on 10 degrees of freedom
## Multiple R-squared:  0.05914,    Adjusted R-squared:  -0.03495 
## F-statistic: 0.6285 on 1 and 10 DF,  p-value: 0.4463
################### Inverse Simpson ################### 
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))  # All samples together 
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.97302 -0.28199 -0.05285  0.32003  0.99088 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.86039    0.18153 -43.301  < 2e-16 ***
## mean         0.02355    0.00525   4.485 0.000204 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4596 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4893, Adjusted R-squared:  0.4649 
## F-statistic: 20.12 on 1 and 21 DF,  p-value: 0.0002037
# Particle-Associated 
lm_percell_prod_vs_invsimps_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_percell_prod_vs_invsimps_PA)
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, 
##     fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28651 -0.18384 -0.11125  0.07337  0.56444 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.360961   0.159490 -46.153 5.27e-12 ***
## mean         0.018087   0.003759   4.812 0.000958 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2969 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.7201, Adjusted R-squared:  0.689 
## F-statistic: 23.15 on 1 and 9 DF,  p-value: 0.0009581
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_invsimps_PA <- train(
      log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle" & norep_filter_name != "MOTEJ515"), 
      method ='lm', 
      trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100), 
      tuneGrid = expand.grid(intercept = TRUE))

cv_lm_percell_prod_vs_invsimps_PA$results      # Particle-Associated CV results 
##   intercept      RMSE  Rsquared    RMSESD RsquaredSD
## 1      TRUE 0.3550179 0.7224563 0.1143556  0.3292516
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Free"))) # Free Living 
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, 
##     fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68269 -0.11512  0.01325  0.17742  0.63175 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.03513    0.35685 -22.517 6.72e-10 ***
## mean         0.01910    0.01409   1.355    0.205    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3803 on 10 degrees of freedom
## Multiple R-squared:  0.1551, Adjusted R-squared:  0.07064 
## F-statistic: 1.836 on 1 and 10 DF,  p-value: 0.2052
################### Simpson's Evenness ################### 
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))  # All samples together 
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = simpseven)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.06874 -0.41920  0.04553  0.34511  1.56565 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.4496     0.4116  -18.10 2.74e-14 ***
## mean          4.3470     6.0344    0.72    0.479    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6353 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.02411,    Adjusted R-squared:  -0.02236 
## F-statistic: 0.5189 on 1 and 21 DF,  p-value: 0.4792
# Particle-Associated 
lm_percell_prod_vs_simpseven_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle"))
summary(lm_percell_prod_vs_simpseven_PA)
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, 
##     fraction == "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4726 -0.2230 -0.1106  0.1248  0.6887 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.5941     0.2885 -26.324 7.96e-10 ***
## mean         15.1887     4.6341   3.278  0.00957 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3788 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5441, Adjusted R-squared:  0.4935 
## F-statistic: 10.74 on 1 and 9 DF,  p-value: 0.009566
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_simpseven_PA <- train(
      log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle" & norep_filter_name != "MOTEJ515"), 
      method ='lm', 
      trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100), 
      tuneGrid = expand.grid(intercept = TRUE))

cv_lm_percell_prod_vs_simpseven_PA$results      # Particle-Associated CV results 
##   intercept      RMSE  Rsquared    RMSESD RsquaredSD
## 1      TRUE 0.4413381 0.6737451 0.1365141  0.2962024
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Free"))) # Free Living 
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, 
##     fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63112 -0.24216  0.01388  0.14672  0.77426 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.9274     0.5202 -15.240    3e-08 ***
## mean          4.9361     7.1008   0.695    0.503    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4041 on 10 degrees of freedom
## Multiple R-squared:  0.04609,    Adjusted R-squared:  -0.0493 
## F-statistic: 0.4832 on 1 and 10 DF,  p-value: 0.5028

R2 table

########## PUT A TABLE TOGETHER 
# Per Liter Production 
perliter_row1 <- c("Richness", "Per-Liter",
          round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 3),
          round(cv_lm_prod_vs_rich_PA$results$Rsquared, digits = 3), 
          round(cv_lm_prod_vs_rich_PA$results$RsquaredSD, digits = 3))

perliter_row2 <- c("Shannon_Entropy", "Per-Liter",
          round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 3),
          round(cv_lm_prod_vs_shannon_PA$results$Rsquared, digits = 3), 
          round(cv_lm_prod_vs_shannon_PA$results$RsquaredSD, digits = 3))

perliter_row3 <- c("Inverse_Simpson", "Per-Liter",
          round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 3),
          round(cv_lm_prod_vs_invsimps_PA$results$Rsquared, digits = 3), 
          round(cv_lm_prod_vs_invsimps_PA$results$RsquaredSD, digits = 3))

perliter_row4 <- c("Simpsons_Evenness","Per-Liter",
          round(summary(lm_prod_vs_simpseven_PA)$adj.r.squared, digits = 3),
          round(cv_lm_prod_vs_simpseven_PA$results$Rsquared, digits = 3), 
          round(cv_lm_prod_vs_simpseven_PA$results$RsquaredSD, digits = 3))



# Per cell production 
percell_row1 <- c("Richness", "Per-Cell",
          round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 3),
          round(cv_lm_percell_prod_vs_rich_PA$results$Rsquared, digits = 3), 
          round(cv_lm_percell_prod_vs_rich_PA$results$RsquaredSD, digits = 3))

percell_row2 <- c("Shannon_Entropy", "Per-Cell",
          round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 3),
          round(cv_lm_percell_prod_vs_shannon_PA$results$Rsquared, digits = 3), 
          round(cv_lm_percell_prod_vs_shannon_PA$results$RsquaredSD, digits = 3))

percell_row3 <- c("Inverse_Simpson", "Per-Cell",
          round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 3),
          round(cv_lm_percell_prod_vs_invsimps_PA$results$Rsquared, digits = 3), 
          round(cv_lm_percell_prod_vs_invsimps_PA$results$RsquaredSD, digits = 3))

percell_row4 <- c("Simpsons_Evenness", "Per-Cell",
          round(summary(lm_percell_prod_vs_simpseven_PA)$adj.r.squared, digits = 3),
          round(cv_lm_percell_prod_vs_simpseven_PA$results$Rsquared, digits = 3), 
          round(cv_lm_percell_prod_vs_simpseven_PA$results$RsquaredSD, digits = 3))

r2_table <- as.data.frame(rbind(perliter_row1, perliter_row2, perliter_row3, perliter_row4,
                                percell_row1, percell_row2, percell_row3, percell_row4))
colnames(r2_table) <- c("Diversity_Metric", "Production_Measure","Adjusted_R2","CV_R2", "CV_R2_SD")
row.names(r2_table) = NULL

pander(r2_table,
               caption = "Supplemental Table 1: \n R2 estimates for heterotrophic production vs particle-associated diversity linear regressions.")
Supplemental Table 1: R2 estimates for heterotrophic production vs particle-associated diversity linear regressions.
Diversity_Metric Production_Measure Adjusted_R2 CV_R2 CV_R2_SD
Richness Per-Liter 0.559 0.657 0.287
Shannon_Entropy Per-Liter 0.523 0.675 0.268
Inverse_Simpson Per-Liter 0.692 0.75 0.262
Simpsons_Evenness Per-Liter 0.471 0.649 0.295
Richness Per-Cell 0.566 0.632 0.324
Shannon_Entropy Per-Cell 0.547 0.676 0.299
Inverse_Simpson Per-Cell 0.689 0.722 0.329
Simpsons_Evenness Per-Cell 0.493 0.674 0.296

Prepare Figure 2

######################################################### OBSERVED RICHNESS 
rich_fraction_wilcox <- wilcox.test(mean ~ fraction, data = richness)
rich_fraction_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  mean by fraction
## W = 129, p-value = 0.0004955
## alternative hypothesis: true location shift is not equal to 0
filter(richness) %>%
  group_by(fraction) %>%
  summarize(mean(mean), sd(mean))
## # A tibble: 2 × 3
##   fraction `mean(mean)` `sd(mean)`
##     <fctr>        <dbl>      <dbl>
## 1 Particle     556.7992  167.31404
## 2     Free     338.4242   85.98665
# Make a data frame to draw significance line in boxplot (visually calculated)
rich_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(920, 930, 930, 920)) # WholePart vs WholeFree

rich_distribution_plot <- 
  ggplot(richness, aes(y = mean, x = fraction)) +
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  scale_y_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) + 
  xlab("Observed Richness") + xlab("Fraction") + 
  geom_path(data = rich_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
  annotate("text", x=1.5, y=790, fontface = "bold",  size = 4, color = "#424645",
           label= paste("***\np =", round(rich_fraction_wilcox$p.value, digits = 5))) +
  theme(legend.position = "none",# axis.title.y = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank()) +
  coord_flip()


# Richness vs Per-Liter Heterotrophic Production Plot 
prod_vs_rich_plot <-  
  ggplot(richness, aes(x=mean, y=frac_bacprod)) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) +  # Y-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("Heterotrophic Production \n (μgC/L/hr)") + 
  xlab("Observed Richness") +
  geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) + 
  annotate("text", x = 790, y=45, color = "#FF6600", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3))) +
  theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank()) 


# Richness vs Per-cell herterotrophic production Plot 
percell_prod_vs_rich_plot <- 
  ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("log10(production/cell)\n (μgC/cell/hr)") +
  xlab("Observed Richness") +
  geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) + 
  #scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) + 
  annotate("text", x = 790, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_percell_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3))) + 
  #annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))


# All 3 richness plots together 
rich_plots <- plot_grid(rich_distribution_plot, prod_vs_rich_plot, percell_prod_vs_rich_plot,
          labels = c("A", "C", "E"), ncol = 1, nrow = 3,
          rel_heights = c(0.5, 0.8, 1.1),
          align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
######################################################### INVERSE SIMPSON
invsimps_fraction_wilcox <- wilcox.test(mean ~ fraction, data = invsimps)
invsimps_fraction_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  mean by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(invsimps) %>%
  group_by(fraction) %>%
  summarize(mean(mean), sd(mean))
## # A tibble: 2 × 3
##   fraction `mean(mean)` `sd(mean)`
##     <fctr>        <dbl>      <dbl>
## 1 Particle     35.47659  23.843325
## 2     Free     24.09219   8.136901
# Make a data frame to draw significance line in boxplot (visually calculated)
invsimps_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(83,85,85,83)) # WholePart vs WholeFree

invsimps_distribution_plot <- 
  ggplot(invsimps, aes(y = mean, x = fraction)) +
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  scale_y_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) + 
  ylab("Inverse Simpson") + xlab("Fraction") + 
  geom_path(data = invsimps_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
  annotate("text", x=1.5, y=80, fontface = "bold",  size = 4, color = "#424645", label= "NS") +
  theme(legend.position = "none", #axis.title.y = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank()) +
  coord_flip()


# INVERSE SIMPSON 
# Plot Inverse Simpson vs Per-Liter Heterotrophic Production  
prod_vs_invsimps_plot <-  
  ggplot(invsimps, aes(x=mean, y=frac_bacprod)) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) +  # Y-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("Heterotrophic Production \n (μgC/L/hr)") + 
  xlab("Inverse Simpson") +
  geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) + 
  annotate("text", x = 70, y=45, color = "#FF6600", fontface = "bold",size = 4,
           label = paste("R2 =", round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4))) +
  theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank()) 



# Inverse Simpson vs per-cell production Plot 
percell_prod_vs_invsimps_plot <- 
  ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("log10(production/cell)\n (μgC/cell/hr)") +
  xlab("Inverse Simpson") +
  geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) + 
  #scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) + 
  annotate("text", x = 70, y=-7.5, color = "#FF6600", fontface = "bold",size = 4,
           label = paste("R2 =", round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_percell_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4))) +
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))


# Plot Inverse simpson plots altogether
invsimps_plots <- plot_grid(invsimps_distribution_plot, prod_vs_invsimps_plot, percell_prod_vs_invsimps_plot,
          labels = c("B", "D", "F"), ncol = 1, nrow = 3,
          rel_heights = c(0.5, 0.8, 1.1),
          align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

Plot Figure 2

invsimps_plots_noyaxis <- 
  plot_grid(invsimps_distribution_plot + theme(axis.title.y = element_blank(), axis.text.y = element_blank()), 
            prod_vs_invsimps_plot + theme(axis.title.y = element_blank()), 
            percell_prod_vs_invsimps_plot + theme(axis.title.y = element_blank()), 
          labels = c("B", "D", "F"), ncol = 1, nrow = 3,
          rel_heights = c(0.5, 0.8, 1.1),
          align = "v")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
plot_grid(rich_plots, invsimps_plots_noyaxis,
          ncol = 2, nrow = 1, rel_widths = c(1, 0.825),
          align = "h")

Prepare Figure S2

######################################################### SHANNON_ENTROPY
shannon_fraction_wilcox <- wilcox.test(mean ~ fraction, data = shannon)
shannon_fraction_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  mean by fraction
## W = 119, p-value = 0.00556
## alternative hypothesis: true location shift is not equal to 0
filter(shannon) %>%
  group_by(fraction) %>%
  summarize(mean(mean), sd(mean))
## # A tibble: 2 × 3
##   fraction `mean(mean)` `sd(mean)`
##     <fctr>        <dbl>      <dbl>
## 1 Particle     4.534078  0.5594638
## 2     Free     3.982314  0.2958221
# Make a data frame to draw significance line in boxplot (visually calculated)
shannon_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(5.8, 5.9, 5.9, 5.8)) # WholePart vs WholeFree

shannon_distribution_plot <- 
  ggplot(shannon, aes(y = mean, x = fraction)) +
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  scale_y_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) + 
  xlab("Shannon Entropy") +
  xlab("Fraction") + 
    # Add line and pval
  geom_path(data = shannon_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
  annotate("text", x=1.5, y=5.6, fontface = "bold",  size = 4, color = "#424645",
           label= paste("***\np =", round(shannon_fraction_wilcox$p.value, digits = 3))) +
  theme(legend.position = "none",# axis.title.y = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank()) +
  coord_flip()


# Shannon vs Per-Liter Heterotrophic Production Plot 
prod_vs_shannon_plot <-  
  ggplot(shannon, aes(x=mean, y=frac_bacprod)) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) +  # Y-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("Heterotrophic Production \n (μgC/L/hr)") + 
  scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) + 
  xlab("Shannon Entropy") +
  geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  annotate("text", x = 5.25, y=45, color = "#FF6600", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3))) +
  theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank()) 


# Richness vs Per-cell herterotrophic production Plot 
percell_prod_vs_shannon_plot <- 
  ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("log10(production/cell)\n (μgC/cell/hr)") +
  xlab("Shannon Entropy") +
  geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) + 
  #scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) + 
  annotate("text", x = 5.25, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_percell_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3))) + 
  #annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))



shannon_plots <- plot_grid(shannon_distribution_plot, prod_vs_shannon_plot, percell_prod_vs_shannon_plot,
          labels = c("A", "C", "E"), ncol = 1, nrow = 3,
          rel_heights = c(0.5, 0.8, 1.1),
          align = "v")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_errorbar).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_errorbarh).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
######################################################### INVERSE SIMPSON
simpseven_fraction_wilcox <- wilcox.test(mean ~ fraction, data = simpseven)
simpseven_fraction_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  mean by fraction
## W = 55, p-value = 0.3474
## alternative hypothesis: true location shift is not equal to 0
filter(simpseven) %>%
  group_by(fraction) %>%
  summarize(mean(mean), sd(mean))
## # A tibble: 2 × 3
##   fraction `mean(mean)` `sd(mean)`
##     <fctr>        <dbl>      <dbl>
## 1 Particle   0.05890559 0.02537484
## 2     Free   0.07138806 0.01716014
# Make a data frame to draw significance line in boxplot (visually calculated)
simpseven_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(0.14, 0.15, 0.15, 0.14)) # WholePart vs WholeFree

simpseven_distribution_plot <- 
  ggplot(simpseven, aes(y = mean, x = fraction)) +
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  scale_y_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) + 
  ylab("Simpson's Evenness") +
  xlab("Fraction") + 
  geom_path(data = simpseven_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
  annotate("text", x=1.5, y=0.14, fontface = "bold",  size = 4, color = "#424645", label= "NS") +
  theme(legend.position = "none",
        axis.title.x = element_blank(), axis.text.x = element_blank()) +
  coord_flip()



# SIMPSONS EVENNESS
# Plot Inverse Simpson vs Per-Liter Heterotrophic Production  
prod_vs_simpseven_plot <-  
  ggplot(simpseven, aes(x=mean, y=frac_bacprod)) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) +  # Y-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("Heterotrophic Production \n (μgC/L/hr)") + 
  ylab("Simpson's Evenness") +
  geom_smooth(data=filter(simpseven, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) + 
  annotate("text", x = 0.14, y=15, color = "#FF6600", fontface = "bold",size = 4,
           label = paste("R2 =", round(summary(lm_prod_vs_simpseven_PA)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_prod_vs_simpseven_PA)$coefficients[,4][2]), digits = 3))) +
  theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank()) 




# Plot 
percell_prod_vs_simpseven_plot <- 
  ggplot(simpseven, aes(x=mean, y=log10(fracprod_per_cell_noinf))) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("log10(production/cell)\n (μgC/cell/hr)") +
  xlab("Simpson's Evenness") +
  geom_smooth(data=filter(simpseven, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) + 
  annotate("text", x = 0.14, y=-6.3, color = "#FF6600", fontface = "bold",size = 4,
           label = paste("R2 =", round(summary(lm_percell_prod_vs_simpseven_PA)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_percell_prod_vs_simpseven_PA)$coefficients[,4][2]), digits = 4))) +
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))



simpseven_plots <- plot_grid(simpseven_distribution_plot, prod_vs_simpseven_plot, percell_prod_vs_simpseven_plot,
          labels = c("B", "D", "F"), ncol = 1, nrow = 3,
          rel_heights = c(0.5, 0.8, 1.1),
          align = "v")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

Plot Figure S2

simpseven_plots_noyaxis <- 
  plot_grid(simpseven_distribution_plot + theme(axis.title.y = element_blank(), axis.text.y = element_blank()), 
            prod_vs_simpseven_plot + theme(axis.title.y = element_blank()), 
            percell_prod_vs_simpseven_plot + theme(axis.title.y = element_blank()), 
          labels = c("B", "D", "F"), ncol = 1, nrow = 3,
          rel_heights = c(0.5, 0.8, 1.1),
          align = "v")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
plot_grid(shannon_plots, simpseven_plots_noyaxis,
          ncol = 2, nrow = 1, rel_widths = c(1, 0.825),
          align = "h")

lm_simpseven_bulkprod <- lm(frac_bacprod ~ mean, 
                            data = filter(otu_alphadiv, measure == "Simpsons_Evenness"))

ggplot(filter(otu_alphadiv, measure == "Simpsons_Evenness"), 
       aes(x = mean, y = frac_bacprod)) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) +  # Y-axis errorbars
  ylab("Heterotrophic Production \n(ug C/L/hr)") +
  xlab("Simpson's Evenness") + 
  geom_point(size = 3, shape = 21, aes(fill = fraction)) + 
  geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
  scale_fill_manual(values = fraction_colors) +
  scale_color_manual(values = fraction_colors) +
  theme(legend.position = c(0.15, 0.9), legend.title = element_blank()) +
  annotate("text", x = 0.115, y=55, color = "#424645", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm_simpseven_bulkprod)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_simpseven_bulkprod)$coefficients[,4][2]), digits = 3))) 

Prepare Figure S3

plot_all_rich_percell <- 
  ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", fill =  "grey") +
  ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Observed Richness") +
  geom_smooth(method='lm', color = "#424645", fill = "#424645") + 
  scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) + 
  scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) + 
  annotate("text", x = 790, y=-8, color = "#424645", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))$coefficients[,4][2]), digits = 6))) + 
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))

plot_all_shannon_percell <- 
  ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", fill =  "grey") +
  ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Shannon Entropy") +
  geom_smooth(method='lm', color = "#424645", fill = "#424645") + 
  scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) + 
  scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) + 
  annotate("text", x = 5.25, y=-8, color = "#424645", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))$coefficients[,4][2]), digits = 5))) + 
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))


plot_all_invsimps_percell <- 
  ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", fill =  "grey") +
  ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Inverse Simpson") +
  geom_smooth(method='lm', color = "#424645", fill = "#424645") + 
  scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) + 
  scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) + 
  annotate("text", x = 70, y=-8, color = "#424645", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))$coefficients[,4][2]), digits = 5))) + 
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))


plot_all_simpseven_percell <- 
  ggplot(simpseven, aes(x=mean, y=log10(fracprod_per_cell_noinf))) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", fill =  "grey") +
  ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Inverse Simpson") +
  geom_smooth(method='lm', color = "#424645", fill = "#424645") + 
  scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) + 
  scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) + 
  annotate("text", x = 0.12, y=-8, color = "#424645", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))$coefficients[,4][2]), digits = 2))) + 
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))


plot_grid(plot_all_rich_percell, 
          plot_all_shannon_percell + theme(axis.title.y = element_blank()), 
          plot_all_invsimps_percell + theme(axis.title.y = element_blank()), 
          plot_all_simpseven_percell + theme(axis.title.y = element_blank()),
          align = "h", labels = c("A", "B", "C", "D"),
          rel_widths = c(1.05, 0.9, 0.9, 0.9),
          nrow = 1, ncol = 4)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_errorbarh).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_errorbarh).
## Warning: Removed 1 rows containing missing values (geom_point).

Calculate the Phylogenetic Diversity

# Read in the tree
RAREFIED_rm2_fasttree <- read.tree(file = "data/PhyloTree/newick_tree_rm2_rmN.tre")
  
# Load in data that has doubletons removed 
load("data/PhyloTree/surface_PAFL_otu_pruned_RAREFIED_rm2.RData")
surface_PAFL_otu_pruned_RAREFIED_rm2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1891 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 70 sample variables ]
## tax_table()   Taxonomy Table:    [ 1891 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1891 tips and 1889 internal nodes ]
# Create the OTU table for picante 
surface_PAFL_RAREFIED_rm2_otu <- matrix(otu_table(surface_PAFL_otu_pruned_RAREFIED_rm2), nrow = nrow(otu_table(surface_PAFL_otu_pruned_RAREFIED_rm2)))
rownames(surface_PAFL_RAREFIED_rm2_otu) <- sample_names(surface_PAFL_otu_pruned_RAREFIED_rm2)
colnames(surface_PAFL_RAREFIED_rm2_otu) <- taxa_names(surface_PAFL_otu_pruned_RAREFIED_rm2)
    
  
## Calculate input for SES_MPD  
# Convert the abundance data to standardized abundanced vegan function `decostand' , NOTE: method = "total"
otu_decostand_total <- decostand(surface_PAFL_RAREFIED_rm2_otu, method = "total")
# check total abundance in each sample
apply(otu_decostand_total, 1, sum)
## MBREJ515 MBREJ715 MBREJ915 MBREK515 MBREK715 MBREK915 MDPEJ515 MDPEJ715 MDPEJ915 MDPEK515 MDPEK715 MDPEK915 MINEJ515 MINEJ715 MINEJ915 MINEK515 MINEK715 MINEK915 MOTEJ515 MOTEJ715 MOTEJ915 MOTEK515 MOTEK715 MOTEK915 
##        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1
# check for mismatches/missing species between community data and phylo tree
RAREFIED_rm2_matches <- match.phylo.comm(RAREFIED_rm2_fasttree, otu_decostand_total)
# the resulting object is a list with $phy and $comm elements.  replace our
# original data with the sorted/matched data
phy_RAREFIED_rm2 <- RAREFIED_rm2_matches$phy
comm_RAREFIED_rm2 <- RAREFIED_rm2_matches$comm

# Calculate the phylogenetic distances
phy_dist_RAREFIED_rm2 <- cophenetic(phy_RAREFIED_rm2)

  
## Calculate SES_MPD
###################################### INDEPENDENT SWAP ############################################
# calculate standardized effect size mean pairwise distance (ses.mpd)
unweighted_sesMPD_indepswap_RAREFIED_rm2 <- ses.mpd(comm_RAREFIED_rm2, phy_dist_RAREFIED_rm2, null.model = "independentswap", 
                                     abundance.weighted = FALSE, runs = 999)

WEIGHTED_sesMPD_indepswap_RAREFIED_rm2 <- ses.mpd(comm_RAREFIED_rm2, phy_dist_RAREFIED_rm2, null.model = "independentswap", 
                                     abundance.weighted = TRUE, runs = 999)


# Gather div info
rich_df <- filter(otu_alphadiv, measure == "Richness") %>%
  dplyr::select(norep_filter_name, mean, sd, frac_bacprod, SD_frac_bacprod, fracprod_per_cell_noinf)

invsimps_df <- filter(otu_alphadiv, measure == "Inverse_Simpson") %>%
  dplyr::select(norep_filter_name, mean, sd, frac_bacprod, SD_frac_bacprod, fracprod_per_cell_noinf)


# Prepare to be merged with each other 
unweighted_df <- unweighted_sesMPD_indepswap_RAREFIED_rm2 %>%
  rownames_to_column("names") %>%
  mutate(phylo_measure = "Unweighted_SESMPD") %>%
  make_metadata_norep() %>%
  mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree")) %>%
  rename(norep_filter_name = names) %>%
  left_join(rich_df, by = "norep_filter_name") %>%
  mutate(lakesite = factor(lakesite, levels = c("MOT", "MDP","MBR", "MIN")),
         lakesite = fct_recode(lakesite, Outlet = "MOT", Deep = "MDP", Bear = "MBR", River = "MIN"))
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining factor and character vector, coercing into character vector
weighted_df <- WEIGHTED_sesMPD_indepswap_RAREFIED_rm2 %>%
  rownames_to_column("names") %>%
  mutate(phylo_measure = "Weighted_SESMPD") %>%
  make_metadata_norep() %>%
  mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree")) %>%
  rename(norep_filter_name = names) %>%
  left_join(invsimps_df, by = "norep_filter_name") %>%
  mutate(lakesite = factor(lakesite, levels = c("MOT", "MDP","MBR", "MIN")),
         lakesite = fct_recode(lakesite, Outlet = "MOT", Deep = "MDP", Bear = "MBR", River = "MIN"))
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining factor and character vector, coercing into character vector

Figure 3: Unweighted ses.mpd

######################################################### SES MPD DISTRIBUTION: UNWEIGHTED  
unweighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = unweighted_df)
unweighted_fraction_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  mpd.obs.z by fraction
## W = 30, p-value = 0.01449
## alternative hypothesis: true location shift is not equal to 0
unweighted_df %>%
  group_by(fraction) %>%
  summarize(mean(mpd.obs.z))
## # A tibble: 2 × 2
##   fraction `mean(mpd.obs.z)`
##     <fctr>             <dbl>
## 1 Particle        -0.4970703
## 2     Free         0.4375166
# Make a data frame to draw significance line in boxplot (visually calculated)
dat4 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree


unweight_distribution_plot <- 
  ggplot(unweighted_df, aes(y = mpd.obs.z, x = fraction)) +
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  xlab("Fraction") + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
  ##### Particle vs free per-cell production 
  geom_path(data = dat4, aes(x = a, y = b), linetype = 1, color = "#424645") +
  annotate("text", x=1.5, y=1.35, fontface = "bold",  size = 4, color = "#424645",
           label= paste("***\np =", round(unweighted_fraction_wilcox$p.value, digits = 2))) +
  theme(legend.position = "none",# axis.title.y = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank()) +
  coord_flip()

# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(mean ~ mpd.obs.z, data = unweighted_df))
## 
## Call:
## lm(formula = mean ~ mpd.obs.z, data = unweighted_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -195.66  -96.12  -14.17   80.15  295.24 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   443.89      28.29  15.690 1.98e-13 ***
## mpd.obs.z    -124.90      34.37  -3.634  0.00147 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 138.5 on 22 degrees of freedom
## Multiple R-squared:  0.3751, Adjusted R-squared:  0.3467 
## F-statistic:  13.2 on 1 and 22 DF,  p-value: 0.001467
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == 
##     "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -191.75 -112.16  -41.91   55.88  278.93 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   515.16      51.16  10.069 1.49e-06 ***
## mpd.obs.z     -83.76      49.91  -1.678    0.124    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 155 on 10 degrees of freedom
## Multiple R-squared:  0.2198, Adjusted R-squared:  0.1417 
## F-statistic: 2.816 on 1 and 10 DF,  p-value: 0.1242
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
## 
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == 
##     "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -100.15  -66.22  -18.72   43.66  172.93 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  337.075     42.750   7.885 1.34e-05 ***
## mpd.obs.z      3.083     77.507   0.040    0.969    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 90.18 on 10 degrees of freedom
## Multiple R-squared:  0.0001582,  Adjusted R-squared:  -0.09983 
## F-statistic: 0.001583 on 1 and 10 DF,  p-value: 0.9691
unweight_rich_vs_mpd_plot <- 
  ggplot(unweighted_df, aes(y = mean, x = mpd.obs.z)) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21, aes(fill = fraction)) + ylab("Observed  Richness") +
  xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) + 
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  annotate("text", x = 0.75, y=750, color = "#424645", fontface = "bold",
           label = paste("R2 =", round(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$coefficients[,4][2]), digits = 3))) +
  theme(legend.title = element_blank(), legend.position = "none", 
        axis.text.x = element_blank(), axis.title.x = element_blank())


# Is there a relationship between Production and Unweighted Mean Pairwise distance?
#summary(lm(frac_bacprod ~ mpd.obs.z, data = unweighted_df)) # NS 

summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, 
##     fraction == "Particle"))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.144 -4.312 -1.822  3.403 19.527 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    8.213      2.617   3.139   0.0105 *
## mpd.obs.z     -3.510      2.553  -1.375   0.1991  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.928 on 10 degrees of freedom
## Multiple R-squared:  0.159,  Adjusted R-squared:  0.07492 
## F-statistic: 1.891 on 1 and 10 DF,  p-value: 0.1991
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, 
##     fraction == "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.003 -14.934   2.196   8.756  36.997 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   18.183      8.396   2.166   0.0556 .
## mpd.obs.z     13.428     15.223   0.882   0.3984  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.71 on 10 degrees of freedom
## Multiple R-squared:  0.07219,    Adjusted R-squared:  -0.02059 
## F-statistic: 0.7781 on 1 and 10 DF,  p-value: 0.3984
unweight_prod_vs_mpd_plot <- 
  ggplot(unweighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21) + 
  ylab("Heterotrophic Production \n (μgC/L/hr)") +
  xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  theme(legend.title = element_blank(), legend.position = "none",
        axis.text.x = element_blank(), axis.title.x = element_blank())


# Is there a relationship between PER-CELL PRODUCTION and Unweighted Mean Pairwise distance?
unweight_vs_percell <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
summary(unweight_vs_percell)
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7226 -0.2897  0.0040  0.1294  1.3193 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.1988     0.0999 -72.057  < 2e-16 ***
## mpd.obs.z    -0.4972     0.1205  -4.127  0.00048 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4779 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4478, Adjusted R-squared:  0.4215 
## F-statistic: 17.03 on 1 and 21 DF,  p-value: 0.0004795
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, 
##     fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37972 -0.24190 -0.16526  0.04437  1.18153 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.9247     0.1711 -40.472 1.71e-11 ***
## mpd.obs.z    -0.3298     0.1627  -2.027   0.0733 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4649 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3135, Adjusted R-squared:  0.2372 
## F-statistic: 4.109 on 1 and 9 DF,  p-value: 0.07327
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, 
##     fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63981 -0.29782  0.07682  0.17625  0.76499 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.55633    0.19602  -38.55 3.29e-12 ***
## mpd.obs.z   -0.04279    0.35539   -0.12    0.907    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4135 on 10 degrees of freedom
## Multiple R-squared:  0.001447,   Adjusted R-squared:  -0.09841 
## F-statistic: 0.01449 on 1 and 10 DF,  p-value: 0.9066
unweight_percell_vs_mpd_plot <- 
  ggplot(unweighted_df, 
       aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z)) +
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21, aes(fill = fraction)) + 
  ylab("log10(Production/cell)\n (μgC/cell/hr)") +
  xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  geom_smooth(method = "lm", color="#424645", fill = "#424645", alpha = 0.3) +
  #stat_smooth(method="lm", se=TRUE, formula= y ~ poly(x, 2, raw=TRUE), color="#424645", fill = "#424645", alpha = 0.3) +
  scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) + 
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  annotate("text", x = 0.75, y=-6, color = "#424645", fontface = "bold",
         label = paste("R2 =", round(summary(unweight_vs_percell)$adj.r.squared, digits = 2), "\n", 
                       "p =", round(unname(summary(unweight_vs_percell)$coefficients[,4][2]), digits = 4))) +
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))


plot_grid(unweight_distribution_plot, unweight_rich_vs_mpd_plot, unweight_prod_vs_mpd_plot, unweight_percell_vs_mpd_plot, 
          labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
          rel_heights = c(0.5, 0.8, 0.8, 1.2),
          align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

Figure S6: Weighted ses.mpd

######################################################### SES MPD DISTRIBUTION: WEIGHTED 
weighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = weighted_df)
weighted_fraction_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  mpd.obs.z by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(weighted_df) %>%
  group_by(fraction) %>%
  summarize(mean(mpd.obs.z))
## # A tibble: 2 × 2
##   fraction `mean(mpd.obs.z)`
##     <fctr>             <dbl>
## 1 Particle        -0.3607944
## 2     Free        -0.3854885
# Make a data frame to draw significance line in boxplot (visually calculated)
dat5 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree


weight_distribution_plot <- 
  ggplot(weighted_df, aes(y = mpd.obs.z, x = fraction)) +
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  xlab("Fraction") + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
  ##### Particle vs free per-cell production 
  geom_path(data = dat5, aes(x = a, y = b), linetype = 1, color = "#424645") +
  annotate("text", x=1.5, y=1.55, fontface = "bold",  size = 3.5, color = "#424645", label= "NS") +
  theme(legend.position = "none", #axis.title.y = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank()) +
  coord_flip()


# Is there a relationship between inverse simpson and Weighted Mean Pairwise distance?
#summary(lm(mean ~ mpd.obs.z, data = weighted_df)) # NS
  
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction == 
##     "Particle"))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28.42 -19.15   0.40  12.95  40.67 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    23.58      12.61   1.870   0.0911 .
## mpd.obs.z     -32.98      29.43  -1.121   0.2886  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.57 on 10 degrees of freedom
## Multiple R-squared:  0.1116, Adjusted R-squared:  0.02276 
## F-statistic: 1.256 on 1 and 10 DF,  p-value: 0.2886
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))
## 
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction == 
##     "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.0597  -4.3465  -0.8155   3.8832  18.4294 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   22.187      4.720   4.701  0.00084 ***
## mpd.obs.z     -4.942     10.486  -0.471  0.64753    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.441 on 10 degrees of freedom
## Multiple R-squared:  0.02173,    Adjusted R-squared:  -0.0761 
## F-statistic: 0.2221 on 1 and 10 DF,  p-value: 0.6475
weight_invsimps_vs_mpd_plot <- 
  ggplot(weighted_df, aes(y = mean, x = mpd.obs.z, fill = fraction)) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21) + ylab("Inverse Simpson") +
  #xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  theme(legend.title = element_blank(), legend.position = "none",
        axis.text.x = element_blank(), axis.title.x = element_blank())




# Is there a relationship between PER-LITER PRODUCTION and WEIGHTED Mean Pairwise distance?
summary(lm(frac_bacprod ~ mpd.obs.z, data = weighted_df))
## 
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = weighted_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.030 -10.236  -2.842   5.937  38.409 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    9.008      5.664   1.590    0.126
## mpd.obs.z    -21.439     12.889  -1.663    0.110
## 
## Residual standard error: 14.66 on 22 degrees of freedom
## Multiple R-squared:  0.1117, Adjusted R-squared:  0.07133 
## F-statistic: 2.767 on 1 and 22 DF,  p-value: 0.1104
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, 
##     fraction == "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.240  -4.575  -1.717   4.503  15.352 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    4.391      4.126   1.064    0.312
## mpd.obs.z    -15.432      9.627  -1.603    0.140
## 
## Residual standard error: 7.711 on 10 degrees of freedom
## Multiple R-squared:  0.2044, Adjusted R-squared:  0.1249 
## F-statistic: 2.569 on 1 and 10 DF,  p-value: 0.14
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, 
##     fraction == "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.790 -12.275  -3.481   7.360  30.573 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   14.697      9.683   1.518    0.160
## mpd.obs.z    -24.285     21.512  -1.129    0.285
## 
## Residual standard error: 17.32 on 10 degrees of freedom
## Multiple R-squared:  0.113,  Adjusted R-squared:  0.02434 
## F-statistic: 1.274 on 1 and 10 DF,  p-value: 0.2853
weight_prod_vs_mpd_plot <- 
  ggplot(weighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21) + 
  ylab("Heterotrophic Production \n (μgC/L/hr)") +
  xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  theme(legend.title = element_blank(), legend.position = "none",
        axis.text.x = element_blank(), axis.title.x = element_blank())


# Is there a relationship between PER-CELL PRODUCTION and WEIGHTED Mean Pairwise distance?
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.94765 -0.40616 -0.03619  0.31885  1.39101 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.5352     0.2442 -30.860   <2e-16 ***
## mpd.obs.z    -0.9519     0.5446  -1.748   0.0951 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6009 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.127,  Adjusted R-squared:  0.08544 
## F-statistic: 3.055 on 1 and 21 DF,  p-value: 0.09508
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, 
##     fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65329 -0.32632 -0.03486  0.22224  0.95307 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.0845     0.3015 -23.496 2.18e-09 ***
## mpd.obs.z    -0.9337     0.6753  -1.383      0.2    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5096 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1752, Adjusted R-squared:  0.08357 
## F-statistic: 1.912 on 1 and 9 DF,  p-value: 0.2001
lm_percell_free_mpd <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free"))
summary(lm_percell_free_mpd)
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, 
##     fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54189 -0.16159 -0.00204  0.20070  0.44618 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.9519     0.1848 -43.019 1.11e-12 ***
## mpd.obs.z    -0.9777     0.4107  -2.381   0.0386 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3306 on 10 degrees of freedom
## Multiple R-squared:  0.3617, Adjusted R-squared:  0.2979 
## F-statistic: 5.667 on 1 and 10 DF,  p-value: 0.03857
weight_percell_vs_mpd_plot <- 
  ggplot(weighted_df, 
       aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z, fill = fraction)) +
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21) + 
  ylab("log10(Production/cell)\n (μgC/cell/hr)") +
  xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  geom_smooth(method = "lm", data = filter(weighted_df, fraction == "Free"), color = "skyblue", fill = "skyblue", alpha = 0.3) +
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
    scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) + 
  annotate("text", x = 0.3, y=-7.9, color = "skyblue", fontface = "bold",
     label = paste("R2 =", round(summary(lm_percell_free_mpd)$adj.r.squared, digits = 3), "\n", 
                   "p =", round(unname(summary(lm_percell_free_mpd)$coefficients[,4][2]), digits = 3))) +
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))


plot_grid(weight_distribution_plot, weight_invsimps_vs_mpd_plot, weight_prod_vs_mpd_plot, weight_percell_vs_mpd_plot, 
          labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
          rel_heights = c(0.5, 0.8, 0.8, 1.2),
          align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

Figure S7

# Test by station
part_unweighted_df <- filter(unweighted_df, fraction == "Particle")
kruskal.test(mpd.obs.z ~ lakesite, data = filter(unweighted_df, fraction == "Particle"))
## 
##  Kruskal-Wallis rank sum test
## 
## data:  mpd.obs.z by lakesite
## Kruskal-Wallis chi-squared = 5.8718, df = 3, p-value = 0.118
unweighted_df %>%
 filter(fraction == "Particle") %>%
  group_by(lakesite) %>%
  summarize(mean(mpd.obs.z), sd(mpd.obs.z))
## # A tibble: 4 × 3
##   lakesite `mean(mpd.obs.z)` `sd(mpd.obs.z)`
##     <fctr>             <dbl>           <dbl>
## 1   Outlet         0.7950222       0.5856743
## 2     Deep        -0.9216060       0.3564753
## 3     Bear        -0.7817952       0.9425671
## 4    River        -1.0799021       0.2412376
# Lakesite 
plot_part_unweight_lakesite <- ggplot(filter(unweighted_df, fraction == "Particle"),
       aes(y = mpd.obs.z, x = lakesite)) +
  ggtitle("Particle-Associated Samples Only") + 
  scale_fill_manual(values = lakesite_colors) +
  ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  geom_jitter(size = 3, shape = 21, aes(fill = lakesite), width = 0.2) + 
  geom_boxplot(aes(fill = lakesite), alpha = 0.5) +
  theme(axis.title.x = element_blank(),
        legend.position = c(0.85, 0.9), legend.title = element_blank())

# Season
plot_part_unweight_season <- ggplot(filter(unweighted_df, fraction == "Particle"),
       aes(y = mpd.obs.z, x = season)) +
  ggtitle("Particle-Associated Samples Only") + 
  ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  scale_fill_manual(values = season_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = season), width = 0.2) + 
  geom_boxplot(aes(fill = season), alpha = 0.5) + 
  theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
        legend.position = c(0.9, 0.9),  legend.title = element_blank())

plot_grid(plot_part_unweight_lakesite, plot_part_unweight_season, 
          labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")

Prepare Figure S5: Randomized Richness

#########################################################  Subset only richness data 
### These are the richness values for the fake samples 
#rich_stats <- filter(otu_alphadiv, measure == "Richness") %>%
#  dplyr::select(1:2) %>%
#  rename(mean_richness = mean) %>%
#  mutate(sample = paste("Sample_", seq(1:nrow(filter(otu_alphadiv, measure == "Richness"))), sep = ""),
#         mean_richness = matround(mean_richness))

## Pick OTUs to match these richness values 

  # List the otus from ALL samples 
#  all_otus <- taxa_names(surface_PAFL_otu_pruned_rm2)
  
  # Obtain the OTU table from the phyloseq object
#  otutab <- otu_table(surface_PAFL_otu_pruned_rm2)
  # Make all the counts to be 0
#  otutab_newvals <- apply(otutab, c(1, 2), function(x) 0)

  # Stop if things are wrong 
#  stopifnot(colnames(otutab_newvals) == all_otus)                       # Do the OTU names match?
#  stopifnot(rownames(otutab_newvals) == rich_stats$norep_filter_name)   # Do the sample names match?
  
# Make it reproducible!   
#set.seed(777)
  
#for (row in 1:nrow(rich_stats)) {
  
  # Pick the richness value 
#  rich_val <- rich_stats[row, 2]  
  
  # Sample the OTUs to represent that richness value 
#  col_index <- sample(ncol(otutab_newvals), rich_val, replace = FALSE, prob = NULL)
  
  # make all other columns 0
#  otutab_newvals[row, col_index] <- 1

#}


## Calculate the tree for those randomized samples 
# create a new phyloseq object 
#random_physeq_presab_raw <- phyloseq(otu_table(otutab_newvals, taxa_are_rows = FALSE), 
#                                 tax_table(surface_PAFL_otu_pruned_rm2), sample_data(surface_PAFL_otu_pruned_rm2))
#random_physeq_presab_raw

# Remove taxa that are 0!
#random_physeq_presab_pruned <- prune_taxa(taxa_sums(random_physeq_presab_raw) > 0, random_physeq_presab_raw) 
#random_physeq_presab_pruned

# Calculate tree 
# Obtain the OTU names that were retained in the dataset
#tax <- data.frame(tax_table(random_physeq_presab_pruned))
#vector_of_otus <- as.vector(tax$OTU)

# Write out the file for processing/fasttree
# write(vector_of_otus, file = "data/PhyloTree/randomized/random_physeq_presab_pruned_OTUnames.txt", ncolumns = 1, append = FALSE, sep = "\n")

Plot Figure S5: Randomized Richness

# Read in the tree
randomized_tree <- read.tree(file = "data/PhyloTree/randomized/newick_tree_randomized_rmN.tre")
  
  #random_physeq_presab_pruned_tree <- merge_phyloseq(random_physeq_presab_pruned, randomized_tree)
  #random_physeq_presab_pruned_tree
#save(list="random_physeq_presab_pruned_tree", file=paste0("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData")) 

load("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData")
random_physeq_presab_pruned_tree
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2911 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 2911 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2911 tips and 2909 internal nodes ]
# First force the OTU 
randomized_otu <- matrix(otu_table(random_physeq_presab_pruned_tree), nrow = nrow(otu_table(random_physeq_presab_pruned_tree)))
rownames(randomized_otu) <- sample_names(random_physeq_presab_pruned_tree)
colnames(randomized_otu) <- taxa_names(random_physeq_presab_pruned_tree)
    
  
## Calculate input for SES_MPD  
# Convert the presence/absence data to standardized abundanced  vegan function `decostand' , NOTE: method = "pa"
otu_decostand <- decostand(randomized_otu, method = "pa")
# check total abundance in each sample
apply(otu_decostand, 1, sum)
## MBREJ515 MBREJ715 MBREJ915 MBREK515 MBREK715 MBREK915 MDPEJ515 MDPEJ715 MDPEJ915 MDPEK515 MDPEK715 MDPEK915 MINEJ515 MINEJ715 MINEJ915 MINEK515 MINEK715 MINEK915 MOTEJ515 MOTEJ715 MOTEJ915 MOTEK515 MOTEK715 MOTEK915 
##      906      574      434      268      256      358      493      447      476      276      284      381      840      632      586      452      383      511      505      343      444      274      238      381
# check for mismatches/missing species between community data and phylo tree
randomized_matches <- match.phylo.comm(randomized_tree, otu_decostand)
# the resulting object is a list with $phy and $comm elements.  replace our
# original data with the sorted/matched data
phy_randomized_rm2 <- randomized_matches$phy
comm_randomized_rm2 <- randomized_matches$comm

# Calculate the phylogenetic distances
phy_dist_randomized_rm2 <- cophenetic(phy_randomized_rm2)

  
## Calculate SES_MPD
###################################### INDEPENDENT SWAP ############################################
# calculate standardized effect size mean pairwise distance (ses.mpd)
unweighted_sesMPD_indepswap_randomized <- ses.mpd(comm_randomized_rm2, phy_dist_randomized_rm2, null.model = "independentswap", 
                                     abundance.weighted = FALSE, runs = 999)

df <- unweighted_sesMPD_indepswap_randomized

df$names <- row.names(df)
df_2 <- make_metadata_norep(df) %>%
  mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))
  
  
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(ntaxa ~ mpd.obs.z, data = df_2))
## 
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = df_2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -212.74 -129.44  -12.54   53.58  468.91 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   448.27      35.48  12.633 1.47e-11 ***
## mpd.obs.z      56.32      94.24   0.598    0.556    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 173.7 on 22 degrees of freedom
## Multiple R-squared:  0.01598,    Adjusted R-squared:  -0.02875 
## F-statistic: 0.3572 on 1 and 22 DF,  p-value: 0.5562
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Particle")))
## 
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == 
##     "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -203.87 -109.15  -44.96   44.29  341.78 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   557.62      50.67  11.006 6.56e-07 ***
## mpd.obs.z     -33.22     140.62  -0.236    0.818    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 175 on 10 degrees of freedom
## Multiple R-squared:  0.00555,    Adjusted R-squared:  -0.0939 
## F-statistic: 0.05581 on 1 and 10 DF,  p-value: 0.818
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Free")))
## 
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == 
##     "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -107.76  -48.80  -17.85   56.29  159.66 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   342.48      24.62  13.913 7.19e-08 ***
## mpd.obs.z      74.88      62.79   1.193    0.261    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 84.48 on 10 degrees of freedom
## Multiple R-squared:  0.1245, Adjusted R-squared:  0.03699 
## F-statistic: 1.422 on 1 and 10 DF,  p-value: 0.2605
ggplot(df_2, aes(y = ntaxa, x = mpd.obs.z, fill = fraction)) +
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21) + ylab("Randomized Richness") +
  xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  scale_x_continuous(limits = c(-1,1)) + 
  theme(legend.title = element_blank(), legend.position = c(0.15, 0.9))